Overall

Row

Total Number of Counties

3,143

Number of Counties with more than 50% Broadband Acess

Number of Counties with more than 25% Broadband Usage

Row

Broadband Access in the Different Counties

Broadband Usage in the Different Counties

Relationships

Column

Relationship Between Broadband Usage and Broadband Availability

Column

Relationship Between Broadband Usage and Population Size

Relationship Between Broadband Usage and Median Income

Column

Relationship Between Broadband Usage and Population Density

Relationship Between Broadband Usage and Occupied Housing Units

---
title: "US Broadband"
author: "Antony Rono"
output: 
  flexdashboard::flex_dashboard:
    orientation: rows
    vertical_layout: fill
    source_code: embed
editor_options: 
  chunk_output_type: console
---

```{r setup, include=FALSE}

knitr::opts_chunk$set(echo = FALSE)

library(flexdashboard)
library(tidyverse)
library(tidytuesdayR)
library(scales)
library(extrafont)
library(zipcodeR)
library(janitor)
library(magrittr)
library(plotly)
library(tigris)
library(ggthemes)
library(highcharter)


options(tigris_use_cache = TRUE)
options(scipen = 99)
theme_set(theme_light())
```

```{r Load, include=FALSE}

tt <- tt_load("2021-05-11")

broadband <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2021/2021-05-11/broadband.csv')

broadband_zip <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2021/2021-05-11/broadband_zip.csv')


```

```{r Cleaning Data, include=FALSE}

broadband_county <- broadband %>% 
  
  clean_names() %>% 
  
  mutate(state = state.name[match(st, state.abb)],
         
         state = ifelse(st == "DC", "District of Columbia", state),
         
         broadband_availability_per_fcc = parse_number(broadband_availability_per_fcc, na = "-"),
         
         broadband_usage = parse_number(broadband_usage, na = "-"),
         
         county_id = sprintf("%05d", county_id)
         
         ) %>% 
  
  mutate_at(c("county_name", "state"), str_trim)

skimr::skim(broadband)
  

```

```{r Adding County Population to broadband df, include=FALSE}

library(readxl)

population <- read_xlsx("US Population By County.xlsx", skip = 3)

population %<>% 
  
  rename(county_name = ...1) %>% 
  
  clean_names() %>% 
  
  filter(!is.na(census)) %>% 
  
  mutate(county_name = str_remove(county_name, "^[.]")) %>% 
  
  separate(county_name, c("county_name", "state"), sep = ",") %>% 
  
  mutate_at(c("county_name", "state"), str_trim) %>% 
  
  select(county_name, state, population_2017 = x2017, population_2019 = x2019) %>% 
  
  mutate(county_name = case_when(
    
    county_name == "Petersburg Borough" ~ "Petersburg Census Area",
    
    county_name == "Oglala Lakota County" ~ "Oglala County",
    
    TRUE ~ county_name
    
    
  ))
  


broadband_with_population <- broadband_county %>% 
  
  inner_join(population, by = c("county_name", "state")) %>% 
  
  mutate(county_state = paste0(str_remove(county_name, " County"),", ", state))
  
  
```

```{r Cleaning and Joining with zip code db, include=FALSE}

broadband_zip %<>% 
  
  clean_names()

skimr::skim(broadband_zip)

broadband_zip_joined <- broadband_zip %>% 
  
  mutate(postal_code = sprintf("%05d", postal_code)) %>% 
  
  left_join(zip_code_db, by = c("postal_code" = "zipcode")) %>% 
  
   mutate(state = state.name[match(st, state.abb)],
         
         state = ifelse(st == "DC", "District of Columbia", state)
   ) %>% 
  
  mutate(city_state = paste0(major_city,", ", state)) %>% 
  
  select(-county) 

```

Overall {data-icon="fa-map-marker-alt"}
=======================================================================


Row {data-height=250}
-----------------------------------------------------------------------

### Total Number of Counties

```{r count of counties}


valueBox(value = comma(nrow(broadband_county %>% distinct(county_id))), caption = "Number of Counties", icon = "fa-map-pin", color = "green")


```

### Number of Counties with more than 50% Broadband Acess

```{r valuebox of broadband access}

gauge(broadband_county %>% filter(broadband_availability_per_fcc > 0.5) %>%  nrow,
      min = 0, 
      max = nrow(broadband_county), 
      gaugeSectors(success = c(0.75*nrow(broadband_county), nrow(broadband_county)), 
                   warning = c(0.5*nrow(broadband_county),(0.75*nrow(broadband_county)-1)), 
                   danger = c(0, 0.5*nrow(broadband_county)))
      )

```

### Number of Counties with more than 25% Broadband Usage

```{r valuebox of broadband usage}

gauge(broadband_county %>% filter(broadband_usage > 0.25) %>%  nrow,
      min = 0, 
      max = nrow(broadband_county), 
      gaugeSectors(success = c(0.75*nrow(broadband_county), nrow(broadband_county)), 
                   warning = c(0.5*nrow(broadband_county),(0.75*nrow(broadband_county)-1)), 
                   danger = c(0, 0.5*nrow(broadband_county)))
      )

```


Row{data-height=750}
-----------------------------------------------------------------------

### Broadband Access in the Different Counties

```{r Broadband Access in Different Counties}

hcmap(
  
   map = "countries/us/us-all-all-highres", # high resolution world map
  
   data = broadband_with_population %>% rename(fips = county_id) , # name of dataset
  
   joinBy = "fips",
  
   value = "broadband_availability_per_fcc",
  
   showInLegend = FALSE, # hide legend
  
   nullColor = "#FFFFFF",
  
   download_map_data = TRUE,
   
   dataLabels = list(enabled = TRUE, format = '{point.name}')

   ) %>% 
  
  #hc_colorAxis(minColor = "#FFFFCC", maxColor = "blue") %>% 
   
    hc_mapNavigation(enabled = FALSE) %>%
  
   hc_legend("none")
```


### Broadband Usage in the Different Counties

```{r Broadband Usage in Different Counties}

hcmap(
  
   map = "countries/us/us-all-all-highres", # high resolution world map
  
   data = broadband_with_population %>% rename(fips = county_id) , # name of dataset
  
   joinBy = "fips",
  
   value = "broadband_usage",
  
   showInLegend = FALSE, # hide legend
  
   nullColor = "#FFFFFF",
  
   download_map_data = TRUE,
   
   dataLabels = list(enabled = TRUE, format = '{point.name}')

   ) %>% 
  
  #hc_colorAxis(minColor = "#FFFFCC", maxColor = "blue") %>% 
   
    hc_mapNavigation(enabled = FALSE) %>%
  
   hc_legend("none")

```


Relationships {data-orientation=columns data-icon="fa-chart-line"}
=======================================================================

Column
-----------------------------------------------------------------------

### Relationship Between Broadband Usage and Broadband Availability

```{r Broadband Usage by Broadband Availability}

p <- broadband_with_population %>% 
  
  ggplot(aes(broadband_availability_per_fcc,broadband_usage, color = county_state )) +
  
  geom_point() +
  
  geom_smooth(color = "blue", method = "loess")+
  
  theme(legend.position = "none") +
  
  labs(y = "Broadband Usage in 2019",
       
       x = "Broadband Availability in 2017")


p


```

Column
-----------------------------------------------------------------------

### Relationship Between Broadband Usage and Population Size

```{r Broadband Usage by Population Size}

p <- broadband_with_population %>% 
  
  ggplot(aes( broadband_usage,population_2019, color = county_state )) +
  
  geom_point() +
  
  geom_smooth(color = "blue", method = "lm")+
  
  theme(legend.position = "none") +
  
  scale_x_continuous(labels = percent) +
  
  scale_y_log10(labels = comma) +
  
  coord_flip() +
  
  expand_limits(y = 10000)+
  
  labs(x = "Broadband Usage in 2019",
       
       y = "Population in 2019 (Log Scale)")

p

```


### Relationship Between Broadband Usage and Median Income

```{r Broadband Usage by HH Income}

p <- broadband_zip_joined %>% 
  
  filter(!is.na(median_household_income)) %>% 
  
  ggplot(aes(median_household_income, broadband_usage , color = city_state )) +
  
  geom_point() +
  
  geom_smooth(color = "blue", method = "lm")+
  
  scale_x_log10(labels = comma) +
  
  scale_y_continuous(labels = percent, limits = c(0,1)) +
  
  theme(legend.position = "none") +
  
  labs(y = "Broadband Usage in 2020",
       
       x = "Median Household Income")

p

```


Column
-----------------------------------------------------------------------

### Relationship Between Broadband Usage and Population Density

```{r Broadband Usage by Population density}

p <- broadband_zip_joined %>% 
  
  filter(population > quantile(population, 0.2, na.rm = TRUE)) %>% 
  
  ggplot(aes(population_density, broadband_usage , color = city_state )) +
  
  geom_point() +
  
  geom_smooth(color = "blue", size =2, method = "lm")+
  
  scale_x_log10(labels = comma) +
  
  scale_y_continuous(labels = percent) +
  
  
  theme(legend.position = "none") +
  
  labs(y = "Broadband Usage in 2020",
       
       x = "Population Density (Log Scale)")

p

```

### Relationship Between Broadband Usage and Occupied Housing Units

```{r, Broadband Usage by Occupied Housing Units}

p <- broadband_zip_joined %>% 
  
  filter(!is.na(occupied_housing_units), occupied_housing_units > 0) %>% 
  
  ggplot(aes(occupied_housing_units, broadband_usage , color = city_state )) +
  
  geom_point() +
  
  geom_smooth(color = "blue", method = "lm")+
  
  scale_x_log10(labels = comma) +
  
  scale_y_continuous(labels = percent, limits = c(0,1)) +
  
  theme(legend.position = "none") +
  
  labs(y = "Broadband Usage in 2020",
       
       x = "Occupied Housing Units")

p

```